********************************************************************************
****    Calculate the Bootstrapped standard errors of income percentiles   *****  
********************************************************************************
***************     Table 2 and Appendix Table 8-10       **********************


set matsize 800
capture program drop myboot
program define myboot, rclass
clear
local projectfolder "C:\Users\worri\Dropbox\Poverty\COVID19\"
//local projectfolder "/afs/crc.nd.edu/user/j/jhan4/stata/COVID19/"
cd `projectfolder'
use Monthly_Poverty_0520.dta 

gen yr_month=year*100+month   
keep if yr_month>=202001
** replace asecflag 2 => asecflag . 
replace asecflag=. if asecflag==2 
** drop irrelevant variables
drop month hflag qinc* qoinc* cpsid cpsidp durunemp whyunemp whyabsnt wnlook wkstat wksworkorg occ ind classwkr earnwt earnweek inctot


** Strata: yr_month, ASECflag
** Cluster unit: household level
gen ASECflag = 2 if asecflag==.
gen strataid = yr_month*10+ASECflag
rename serial org_serial
bsample, strata(strataid) cluster(org_serial) idcluster(serial)
drop strataid


********************************************************************************
******   Construct a family unit variable for Poverty measures, famnum    ******
** For the poverty measure, a primary family and realted family is considered as one family

/* Assign 1 to primary+realted families */
gen famnum = ftype==1|ftype==3 

/* Assign 2-4 to unrelated subfamilies */ 
sort yr_month asecflag serial ftype pernum
* Each subfamily has a reference person  (famrel==1)
* Calculate # subfamilies in a household (Max 3 in this sample period)
bysort yr_month asecflag serial: egen tot_subfam = sum(ftype==4 & famrel==1)
tab tot_subfam if ftype==4 
sort yr_month asecflag serial ftype famrel
bysort yr_month asecflag serial ftype: gen subfamnum = sum(famrel==1)
tab subfamnum if ftype==4 
* Assign 2-4 to the 1st-3rd unrelated subfamily
replace famnum= subfamnum+1 if ftype==4 
drop subfamnum 


/* Assign 5-20 to non-family members (ftype=1,2 or famrel=0) */
* Calculate # non-family members within a household (Max 16 in this sample period)
bysort yr_month asecflag serial: gen non_fam = sum(famrel==0) 
tab non_fam 
* Assign 5-20 to the 1st-20th non-family member. 
replace famnum= non_fam+4 if famrel==0  
drop non_fam

* Construct demographic variables related to the poverty threshold 
* 1) family size
bysort yr_month asecflag serial famnum: gen fam_size = _N
* 2) related children under age 18 (excluding HH head or spouse)
bysort yr_month asecflag serial famnum: egen nchild = sum(age<18 & relate>=301)
* 3) the head is elderly (>=65)
bysort yr_month asecflag serial famnum: egen head_elderly = max(age>=65 & relate==101)
* 4) a few cases where all members are children -> change to one adult w/ two children 
replace nchild=nchild-1 if fam_size==nchild


* Assign poverty thresholds from the ASEC data to the relevant dem. group in the Monthly CPS
merge m:1 year fam_size nchild head_elderly using pov_thresh
keep if _merge==3
drop _merge


*5) merge this household level data to the original individual level data (need to use the original serial)
rename serial new_serial
rename org_serial serial
merge m:1 yr_month asecflag serial using impute
keep if _merge==3
drop _merge
drop serial
rename new_serial serial


********************************************************************************
keep if yr_month>=202001

* Scale variables
gen scale=((fam_size-nchild+.7*nchild)^.7)

* Construct variables for subgroup analysis 

*1) Full sample
gen gr1=1

*2) Age subgroup
gen gr2= age<18
replace gr2 = 2 if age>=18 & age<65   
replace gr2 = 3 if age>=65

*3) Race subgroup
gen gr3 = race==100             // white   
replace gr3 = 2 if race==200    // black   
replace gr3 = 3 if race!=100 & race!=200  

*4) gender subgroup
gen gr4 = sex==1
replace gr4 = 2 if gr4==0

*5) Education subgroup 
*Head's education
bysort yr_month asecflag serial famnum: egen head_hs_less = max(relate==101 & edu<=73)
gen gr5 = head_hs_less==1
replace gr5 = 2 if head_hs_less==0

merge m:1 statefip using st_covid.dta
drop _merge

********************************************************************************
*************          Combine with the State COVID info         ***************
*6) COVID death rate
gen gr6 = high_dr==1
replace gr6 = 2 if gr6==0
*7) Stay at Home order
gen gr7= early_sh==1
replace gr7 = 2 if gr7==0
*8) State of Emergency order
gen gr8 = early_soe==1
replace gr8 = 2 if gr8==0
*9) UI recipiency rate
gen gr9 = high_ui==1
replace gr9 = 2 if gr9==0


********************************************************************************
* Missing incomes or CPS-imputed income 
gen miss_inc = (faminc>=995 & faminc<=999)|(qfaminc>0 & qfaminc!=.)

*** Restrict the sample to householder's family 
bysort yr_month asecflag serial famnum: egen hfam = sum(relate==101) 
keep if hfam==1

*** Restrict the sample to individuals in the 1st or 5th month interview
keep if mish==1|mish==5

*** Restrict the sample to individuals with non-imputed income 
keep if miss_inc!=1


****************************************************************
********************************************************************************
**  Construct Fixed Demographc Weight (Using the composition in Jan & Feb 2020)
// Famtype : 1) Single parent 2) Married parent, 3) Single Individuals, 4) Married w/o children, 5) Head 65+
// Head age: 1) age 39 or less, 2) age 40-64, 3) age 65 or more
// Head edu: 1) less than HS, 2) HS and some college, 3) Bachelor  
// treat individuals outside the householder's family as a separate group  

*1) Family Type
* Head's marital status
bysort yr_month asecflag serial famnum: egen head_married = max((marst==1|marst==2) & relate==101)
gen famtype = 0
replace famtype = 1 if head_elderly==0 & head_married==0 & nchild>=1 //Single Parent       
replace famtype = 2 if head_elderly==0 & head_married==1 & nchild>=1 //Married Parent        
replace famtype = 3 if head_elderly==0 & head_married==0 & nchild==0 //Single Individuals        
replace famtype = 4 if head_elderly==0 & head_married==1 & nchild==0 //Married w/o Children             
replace famtype = 5 if head_elderly==1                               //Head 65 and Over   
*2) Age (39-, 2: 40-64, 3:65+)
bysort yr_month asecflag serial famnum: egen head_under40 = max(age<=39 & relate==101)
bysort yr_month asecflag serial famnum: egen head_4064 = max((age>=40 & age<=64) & relate==101)
gen agegr= head_under40==1
replace agegr = 2 if head_4064==1
replace agegr = 3 if head_elderly==1
*3) Education 
*Head's education
bysort yr_month asecflag serial famnum: egen head_less_hs = max(relate==101 & edu<=72)
bysort yr_month asecflag serial famnum: egen head_hs_sc = max(relate==101 & (edu>=73 & edu<=110))
bysort yr_month asecflag serial famnum: egen head_bachelor = max(relate==101 & edu>=111)
gen edugr = head_less_hs==1
replace edugr = 2 if head_hs_sc==1
replace edugr = 3 if head_bachelor==1

gen group = 100*famtype+10*agegr+edugr
bysort yr_month: tab group [aw=wtfinl] 	

* Fix the demographic composition constant in Jan and Feb 2020
preserve 
keep if yr_month==202001|yr_month==202002
egen totpop = sum(wtfinl)
bysort group: egen grpop = sum(wtfinl)
gen jan_frac = grpop/totpop
collapse jan_frac, by (group)
tempfile jan_frac
save `jan_frac', replace
restore

merge m:1 group using `jan_frac'
keep if _merge==3
drop _merge

bysort yr_month: egen totpop = sum(wtfinl)
bysort yr_month group: egen grpop = sum(wtfinl)
gen frac = grpop/totpop
bysort yr_month group: gen fixedwgt = wtfinl*(jan_frac/frac)
bysort yr_month group: gen fixedfrac = (wtfinl/grpop)*(jan_frac)
bysort yr_month group: egen sum_fixedfrac = sum(fixedfrac)
bysort yr_month group: sum fixedwgt
bysort yr_month group: sum sum_fixedfrac
** for March 2020 onward, use the fixed demographic weight
replace wtfinl = fixedwgt if yr_month>=202003


********************************************************************************
***  Poverty, Income, Employment variables for 1st and 5th month interview   ***

forvalues n=1/1 {
gen pov`n'= ifam_inc<pov_thresh
gen ifam_inc`n'=ifam_inc
}
merge m:1 yr_month using PCE
keep if _merge==3
drop _merge
** equivalent scale adjustment & 2 adults and 2 children
replace ifam_inc1 = PCE*2.355*ifam_inc1/scale   




********************************************************************************
*************               Calculate Standard Errors              *************


* Full sample
forvalues m = 1/6 {
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m', detail  
return scalar p25_`m' = r(p25)
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m', detail  
return scalar p50_`m' = r(p50)
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m', detail  
return scalar p75_`m' = r(p75)
}

* Subgroup with two categories
forvalues m = 1/6 {
forvalues i = 2/9 {
forvalues j = 1/2 {
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p25_`m'`i'`j' = r(p25)
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p50_`m'`i'`j' = r(p50)
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p75_`m'`i'`j' = r(p75)
}
}
}

* Subgroup with three categories
forvalues m = 1/6 {
forvalues i = 2/3 {
forvalues j = 3/3 {
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p25_`m'`i'`j' = r(p25)
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p50_`m'`i'`j' = r(p50)
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p75_`m'`i'`j' = r(p75)
}
}
}



** Merge Jan. and Feb. data, and merge April and May data
drop if yr_month==202003
replace yr_month=202001 if yr_month==202002
replace yr_month=202004 if yr_month==202005|yr_month==202006


foreach m in 1 4 {
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m', detail  
return scalar p25c_`m' = r(p25)
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m', detail  
return scalar p50c_`m' = r(p50)
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m', detail  
return scalar p75c_`m' = r(p75)
}

* Subgroup with two categories
foreach m in 1 4 {
forvalues i = 2/9 {
forvalues j = 1/2 {
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p25c_`m'`i'`j' = r(p25)
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p50c_`m'`i'`j' = r(p50)
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p75c_`m'`i'`j' = r(p75)
}
}
}

* Subgroup with three categories
foreach m in 1 4 {
forvalues i = 2/3 {
forvalues j = 3/3 {
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p25c_`m'`i'`j' = r(p25)
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p50c_`m'`i'`j' = r(p50)
sum ifam_inc1 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p75c_`m'`i'`j' = r(p75)
}
}
}

end
#delimit;
simulate 
/* 25th pct (fullsample) */ 
p251 = r(p25_1)
p252 = r(p25_2)
p253 = r(p25_3)
p254 = r(p25_4)
p255 = r(p25_5)
p256 = r(p25_6) 

/* 25th pct (subgroup) */ 
p25121 = r(p25_121) p25122 = r(p25_122) p25123 = r(p25_123)
p25131 = r(p25_131) p25132 = r(p25_132) p25133 = r(p25_133)
p25141 = r(p25_141) p25142 = r(p25_142)
p25151 = r(p25_151) p25152 = r(p25_152)
p25161 = r(p25_161) p25162 = r(p25_162)
p25171 = r(p25_171) p25172 = r(p25_172)
p25181 = r(p25_181) p25182 = r(p25_182)
p25191 = r(p25_191) p25192 = r(p25_192)

p25221 = r(p25_221) p25222 = r(p25_222) p25223 = r(p25_223)
p25231 = r(p25_231) p25232 = r(p25_232) p25233 = r(p25_233)
p25241 = r(p25_241) p25242 = r(p25_242)
p25251 = r(p25_251) p25252 = r(p25_252)
p25261 = r(p25_261) p25262 = r(p25_262)
p25271 = r(p25_271) p25272 = r(p25_272)
p25281 = r(p25_281) p25282 = r(p25_282)
p25291 = r(p25_291) p25292 = r(p25_292)

p25321 = r(p25_321) p25322 = r(p25_322) p25323 = r(p25_323)
p25331 = r(p25_331) p25332 = r(p25_332) p25333 = r(p25_333)
p25341 = r(p25_341) p25342 = r(p25_342)
p25351 = r(p25_351) p25352 = r(p25_352)
p25361 = r(p25_361) p25362 = r(p25_362)
p25371 = r(p25_371) p25372 = r(p25_372)
p25381 = r(p25_381) p25382 = r(p25_382)
p25391 = r(p25_391) p25392 = r(p25_392)

p25421 = r(p25_421) p25422 = r(p25_422) p25423 = r(p25_423)
p25431 = r(p25_431) p25432 = r(p25_432) p25433 = r(p25_433)
p25441 = r(p25_441) p25442 = r(p25_442)
p25451 = r(p25_451) p25452 = r(p25_452)
p25461 = r(p25_461) p25462 = r(p25_462)
p25471 = r(p25_471) p25472 = r(p25_472)
p25481 = r(p25_481) p25482 = r(p25_482)
p25491 = r(p25_491) p25492 = r(p25_492)

p25521 = r(p25_521) p25522 = r(p25_522) p25523 = r(p25_523)
p25531 = r(p25_531) p25532 = r(p25_532) p25533 = r(p25_533)
p25541 = r(p25_541) p25542 = r(p25_542)
p25551 = r(p25_551) p25552 = r(p25_552)
p25561 = r(p25_561) p25562 = r(p25_562)
p25571 = r(p25_571) p25572 = r(p25_572)
p25581 = r(p25_581) p25582 = r(p25_582)
p25591 = r(p25_591) p25592 = r(p25_592)

p25621 = r(p25_621) p25622 = r(p25_622) p25623 = r(p25_623)
p25631 = r(p25_631) p25632 = r(p25_632) p25633 = r(p25_633)
p25641 = r(p25_641) p25642 = r(p25_642)
p25651 = r(p25_651) p25652 = r(p25_652)
p25661 = r(p25_661) p25662 = r(p25_662)
p25671 = r(p25_671) p25672 = r(p25_672)
p25681 = r(p25_681) p25682 = r(p25_682)
p25691 = r(p25_691) p25692 = r(p25_692)

/* 50th pct (fullsample) */ 
p501 = r(p50_1)
p502 = r(p50_2)
p503 = r(p50_3)
p504 = r(p50_4)
p505 = r(p50_5)
p506 = r(p50_6) 

/* 50th pct (subgroup) */ 
p50121 = r(p50_121) p50122 = r(p50_122) p50123 = r(p50_123)
p50131 = r(p50_131) p50132 = r(p50_132) p50133 = r(p50_133)
p50141 = r(p50_141) p50142 = r(p50_142)
p50151 = r(p50_151) p50152 = r(p50_152)

p50221 = r(p50_221) p50222 = r(p50_222) p50223 = r(p50_223)
p50231 = r(p50_231) p50232 = r(p50_232) p50233 = r(p50_233)
p50241 = r(p50_241) p50242 = r(p50_242)
p50251 = r(p50_251) p50252 = r(p50_252)

p50321 = r(p50_321) p50322 = r(p50_322) p50323 = r(p50_323)
p50331 = r(p50_331) p50332 = r(p50_332) p50333 = r(p50_333)
p50341 = r(p50_341) p50342 = r(p50_342)
p50351 = r(p50_351) p50352 = r(p50_352)

p50421 = r(p50_421) p50422 = r(p50_422) p50423 = r(p50_423)
p50431 = r(p50_431) p50432 = r(p50_432) p50433 = r(p50_433)
p50441 = r(p50_441) p50442 = r(p50_442)
p50451 = r(p50_451) p50452 = r(p50_452)

p50521 = r(p50_521) p50522 = r(p50_522) p50523 = r(p50_523)
p50531 = r(p50_531) p50532 = r(p50_532) p50533 = r(p50_533)
p50541 = r(p50_541) p50542 = r(p50_542)
p50551 = r(p50_551) p50552 = r(p50_552)

p50621 = r(p50_621) p50622 = r(p50_622) p50623 = r(p50_623)
p50631 = r(p50_631) p50632 = r(p50_632) p50633 = r(p50_633)
p50641 = r(p50_641) p50642 = r(p50_642)
p50651 = r(p50_651) p50652 = r(p50_652)

/* 75th pct (fullsample) */ 
p751 = r(p75_1)
p752 = r(p75_2)
p753 = r(p75_3)
p754 = r(p75_4)
p755 = r(p75_5)
p756 = r(p75_6) 

/* 75th pct (subgroup) */ 
p75121 = r(p75_121) p75122 = r(p75_122) p75123 = r(p75_123)
p75131 = r(p75_131) p75132 = r(p75_132) p75133 = r(p75_133)
p75141 = r(p75_141) p75142 = r(p75_142)
p75151 = r(p75_151) p75152 = r(p75_152)

p75221 = r(p75_221) p75222 = r(p75_222) p75223 = r(p75_223)
p75231 = r(p75_231) p75232 = r(p75_232) p75233 = r(p75_233)
p75241 = r(p75_241) p75242 = r(p75_242)
p75251 = r(p75_251) p75252 = r(p75_252)

p75321 = r(p75_321) p75322 = r(p75_322) p75323 = r(p75_323)
p75331 = r(p75_331) p75332 = r(p75_332) p75333 = r(p75_333)
p75341 = r(p75_341) p75342 = r(p75_342)
p75351 = r(p75_351) p75352 = r(p75_352)

p75421 = r(p75_421) p75422 = r(p75_422) p75423 = r(p75_423)
p75431 = r(p75_431) p75432 = r(p75_432) p75433 = r(p75_433)
p75441 = r(p75_441) p75442 = r(p75_442)
p75451 = r(p75_451) p75452 = r(p75_452)

p75521 = r(p75_521) p75522 = r(p75_522) p75523 = r(p75_523)
p75531 = r(p75_531) p75532 = r(p75_532) p75533 = r(p75_533)
p75541 = r(p75_541) p75542 = r(p75_542)
p75551 = r(p75_551) p75552 = r(p75_552)

p75621 = r(p75_621) p75622 = r(p75_622) p75623 = r(p75_623)
p75631 = r(p75_631) p75632 = r(p75_632) p75633 = r(p75_633)
p75641 = r(p75_641) p75642 = r(p75_642)
p75651 = r(p75_651) p75652 = r(p75_652)


/*********************************************/
/* 25th pct (fullsample) */ 
p257 = (r(p25c_4)-r(p25c_1))

/* 25th pct (subgroup) */ 
p25721 = (r(p25c_421)-r(p25c_121)) p25722 = (r(p25c_422)-r(p25c_122)) p25723 = (r(p25c_423)-r(p25c_123))
p25731 = (r(p25c_431)-r(p25c_131)) p25732 = (r(p25c_432)-r(p25c_132)) p25733 = (r(p25c_433)-r(p25c_133))
p25741 = (r(p25c_441)-r(p25c_141)) p25742 = (r(p25c_442)-r(p25c_142)) 
p25751 = (r(p25c_451)-r(p25c_151)) p25752 = (r(p25c_452)-r(p25c_152)) 
p25761 = (r(p25c_461)-r(p25c_161)) p25762 = (r(p25c_462)-r(p25c_162)) 
p25771 = (r(p25c_471)-r(p25c_171)) p25772 = (r(p25c_472)-r(p25c_172)) 
p25781 = (r(p25c_481)-r(p25c_181)) p25782 = (r(p25c_482)-r(p25c_182)) 
p25791 = (r(p25c_491)-r(p25c_191)) p25792 = (r(p25c_492)-r(p25c_192)) 

/* 50th pct (fullsample) */ 
p507 = (r(p50c_4)-r(p50c_1))

/* 50th pct (subgroup) */ 
p50721 = (r(p50c_421)-r(p50c_121)) p50722 = (r(p50c_422)-r(p50c_122)) p50723 = (r(p50c_423)-r(p50c_123))
p50731 = (r(p50c_431)-r(p50c_131)) p50732 = (r(p50c_432)-r(p50c_132)) p50733 = (r(p50c_433)-r(p50c_133))
p50741 = (r(p50c_441)-r(p50c_141)) p50742 = (r(p50c_442)-r(p50c_142)) 
p50751 = (r(p50c_451)-r(p50c_151)) p50752 = (r(p50c_452)-r(p50c_152)) 

/* 75th pct (fullsample) */ 
p757 = (r(p75c_4)-r(p75c_1))

/* 75th pct (subgroup) */ 
p75721 = (r(p75c_421)-r(p75c_121)) p75722 = (r(p75c_422)-r(p75c_122)) p75723 = (r(p75c_423)-r(p75c_123))
p75731 = (r(p75c_431)-r(p75c_131)) p75732 = (r(p75c_432)-r(p75c_132)) p75733 = (r(p75c_433)-r(p75c_133))
p75741 = (r(p75c_441)-r(p75c_141)) p75742 = (r(p75c_442)-r(p75c_142)) 
p75751 = (r(p75c_451)-r(p75c_151)) p75752 = (r(p75c_452)-r(p75c_152)) 
,
reps(200) seed(1234): myboot;
#delimit cr


collapse (sd) p25* p50* p75* 
gen id = _n
reshape long p25 p50 p75, i(id) j(month)
gen mo = real(substr(string(month), 1, 1))
gen gr = real(substr(string(month), 2, 2))
keep p25 p50 p75 gr mo
reshape wide p25 p50 p75, i(gr) j(mo)
replace gr = 1 if gr==.
sort gr
keep gr p25* p50* p75* 
order gr p25* p50* p75* 

preserve 
keep gr p25*
keep if _n<=11
export excel using "`projectfolder'Table2_BSE.xls", firstrow(variables) replace 
restore

preserve
keep gr p25*
keep if _n>=12
export excel using "`projectfolder'App.T10_BSE.xls", firstrow(variables) replace 
restore


preserve
keep gr p50*
keep if _n<=11
export excel using "`projectfolder'App.T8_BSE.xls", firstrow(variables) replace 
restore


preserve
keep gr p75*
keep if _n<=11
export excel using "`projectfolder'App.T9_BSE.xls", firstrow(variables) replace 
restore

********************************************************************************
*******    Appendix Table 7： Original Weight, 5th month interview      *********
********************************************************************************


set matsize 800
capture program drop myboot
program define myboot, rclass
clear
//local projectfolder "C:\Users\worri\Dropbox\Poverty\COVID19\"
local projectfolder "/afs/crc.nd.edu/user/j/jhan4/stata/COVID19/"
cd `projectfolder'
use Monthly_Poverty_0520.dta 

gen yr_month=year*100+month   
keep if yr_month>=202001
** replace asecflag 2 => asecflag . 
replace asecflag=. if asecflag==2 
** drop irrelevant variables
drop month hflag qinc* qoinc* cpsid cpsidp durunemp whyunemp whyabsnt wnlook wkstat wksworkorg occ ind classwkr earnwt earnweek inctot


** Strata: yr_month, ASECflag
** Cluster unit: household level
gen ASECflag = 2 if asecflag==.
gen strataid = yr_month*10+ASECflag
rename serial org_serial
bsample, strata(strataid) cluster(org_serial) idcluster(serial)
drop strataid


********************************************************************************
******   Construct a family unit variable for Poverty measures, famnum    ******
** For the poverty measure, a primary family and realted family is considered as one family

/* Assign 1 to primary+realted families */
gen famnum = ftype==1|ftype==3 

/* Assign 2-4 to unrelated subfamilies */ 
sort yr_month asecflag serial ftype pernum
* Each subfamily has a reference person  (famrel==1)
* Calculate # subfamilies in a household (Max 3 in this sample period)
bysort yr_month asecflag serial: egen tot_subfam = sum(ftype==4 & famrel==1)
tab tot_subfam if ftype==4 
sort yr_month asecflag serial ftype famrel
bysort yr_month asecflag serial ftype: gen subfamnum = sum(famrel==1)
tab subfamnum if ftype==4 
* Assign 2-4 to the 1st-3rd unrelated subfamily
replace famnum= subfamnum+1 if ftype==4 
drop subfamnum 


/* Assign 5-20 to non-family members (ftype=1,2 or famrel=0) */
* Calculate # non-family members within a household (Max 16 in this sample period)
bysort yr_month asecflag serial: gen non_fam = sum(famrel==0) 
tab non_fam 
* Assign 5-20 to the 1st-20th non-family member. 
replace famnum= non_fam+4 if famrel==0  
drop non_fam

* Construct demographic variables related to the poverty threshold 
* 1) family size
bysort yr_month asecflag serial famnum: gen fam_size = _N
* 2) related children under age 18 (excluding HH head or spouse)
bysort yr_month asecflag serial famnum: egen nchild = sum(age<18 & relate>=301)
* 3) the head is elderly (>=65)
bysort yr_month asecflag serial famnum: egen head_elderly = max(age>=65 & relate==101)
* 4) a few cases where all members are children -> change to one adult w/ two children 
replace nchild=nchild-1 if fam_size==nchild


* Assign poverty thresholds from the ASEC data to the relevant dem. group in the Monthly CPS
merge m:1 year fam_size nchild head_elderly using pov_thresh
keep if _merge==3
drop _merge


*5) merge this household level data to the original individual level data (need to use the original serial)
rename serial new_serial
rename org_serial serial
merge m:1 yr_month asecflag serial using impute
keep if _merge==3
drop _merge
drop serial
rename new_serial serial


********************************************************************************
keep if yr_month>=202001

* Scale variables
gen scale=((fam_size-nchild+.7*nchild)^.7)

* Construct variables for subgroup analysis 

*1) Full sample
gen gr1=1

*2) Age subgroup
gen gr2= age<18
replace gr2 = 2 if age>=18 & age<65   
replace gr2 = 3 if age>=65

*3) Race subgroup
gen gr3 = race==100             // white   
replace gr3 = 2 if race==200    // black   
replace gr3 = 3 if race!=100 & race!=200  

*4) gender subgroup
gen gr4 = sex==1
replace gr4 = 2 if gr4==0

*5) Education subgroup 
*Head's education
bysort yr_month asecflag serial famnum: egen head_hs_less = max(relate==101 & edu<=73)
gen gr5 = head_hs_less==1
replace gr5 = 2 if head_hs_less==0

merge m:1 statefip using st_covid.dta
drop _merge

********************************************************************************
*************          Combine with the State COVID info         ***************
*6) COVID death rate
gen gr6 = high_dr==1
replace gr6 = 2 if gr6==0
*7) Stay at Home order
gen gr7= early_sh==1
replace gr7 = 2 if gr7==0
*8) State of Emergency order
gen gr8 = early_soe==1
replace gr8 = 2 if gr8==0
*9) UI recipiency rate
gen gr9 = high_ui==1
replace gr9 = 2 if gr9==0


********************************************************************************
* Missing incomes or CPS-imputed income 
gen miss_inc = (faminc>=995 & faminc<=999)|(qfaminc>0 & qfaminc!=.)

*** Restrict the sample to householder's family 
bysort yr_month asecflag serial famnum: egen hfam = sum(relate==101) 
keep if hfam==1

*** Restrict the sample to individuals in the 5th month interview
keep if mish==5

*** Restrict the sample to individuals with non-imputed income 
keep if miss_inc!=1

********************************************************************************
***  Poverty, Income, Employment variables for 1st and 5th month interview   ***

forvalues n=2/2 {
gen pov`n'= ifam_inc<pov_thresh
gen ifam_inc`n'=ifam_inc
}

merge m:1 yr_month using PCE
keep if _merge==3
drop _merge
** equivalent scale adjustment & 2 adults and 2 children
replace ifam_inc2 = PCE*2.355*ifam_inc2/scale   




********************************************************************************
*************               Calculate Standard Errors              *************
* Full sample
forvalues m = 1/6 {
sum ifam_inc2 [aweight=wtfinl] if yr_month==20200`m', detail  
return scalar p25_`m' = r(p25)
}

* Subgroup with two categories
forvalues m = 1/6 {
forvalues i = 2/9 {
forvalues j = 1/2 {
sum ifam_inc2 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p25_`m'`i'`j' = r(p25)
}
}
}

* Subgroup with three categories
forvalues m = 1/6 {
forvalues i = 2/3 {
forvalues j = 3/3 {
sum ifam_inc2 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p25_`m'`i'`j' = r(p25)
}
}
}



** Merge Jan. and Feb. data, and merge April and May data
drop if yr_month==202003
replace yr_month=202001 if yr_month==202002
replace yr_month=202004 if yr_month==202005|yr_month==202006


foreach m in 1 4 {
sum ifam_inc2 [aweight=wtfinl] if yr_month==20200`m', detail  
return scalar p25c_`m' = r(p25)
}

* Subgroup with two categories
foreach m in 1 4 {
forvalues i = 2/9 {
forvalues j = 1/2 {
sum ifam_inc2 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p25c_`m'`i'`j' = r(p25)
}
}
}

* Subgroup with three categories
foreach m in 1 4 {
forvalues i = 2/3 {
forvalues j = 3/3 {
sum ifam_inc2 [aweight=wtfinl] if yr_month==20200`m' & gr`i'==`j', detail  
return scalar p25c_`m'`i'`j' = r(p25)
}
}
}

end
#delimit;
simulate 
/* 25th pct (fullsample) */ 
p251 = r(p25_1)
p252 = r(p25_2)
p253 = r(p25_3)
p254 = r(p25_4)
p255 = r(p25_5)
p256 = r(p25_6) 

/* 25th pct (subgroup) */ 
p25121 = r(p25_121) p25122 = r(p25_122) p25123 = r(p25_123)
p25131 = r(p25_131) p25132 = r(p25_132) p25133 = r(p25_133)
p25141 = r(p25_141) p25142 = r(p25_142)
p25151 = r(p25_151) p25152 = r(p25_152)


p25221 = r(p25_221) p25222 = r(p25_222) p25223 = r(p25_223)
p25231 = r(p25_231) p25232 = r(p25_232) p25233 = r(p25_233)
p25241 = r(p25_241) p25242 = r(p25_242)
p25251 = r(p25_251) p25252 = r(p25_252)


p25321 = r(p25_321) p25322 = r(p25_322) p25323 = r(p25_323)
p25331 = r(p25_331) p25332 = r(p25_332) p25333 = r(p25_333)
p25341 = r(p25_341) p25342 = r(p25_342)
p25351 = r(p25_351) p25352 = r(p25_352)


p25421 = r(p25_421) p25422 = r(p25_422) p25423 = r(p25_423)
p25431 = r(p25_431) p25432 = r(p25_432) p25433 = r(p25_433)
p25441 = r(p25_441) p25442 = r(p25_442)
p25451 = r(p25_451) p25452 = r(p25_452)

p25521 = r(p25_521) p25522 = r(p25_522) p25523 = r(p25_523)
p25531 = r(p25_531) p25532 = r(p25_532) p25533 = r(p25_533)
p25541 = r(p25_541) p25542 = r(p25_542)
p25551 = r(p25_551) p25552 = r(p25_552)

p25621 = r(p25_621) p25622 = r(p25_622) p25623 = r(p25_623)
p25631 = r(p25_631) p25632 = r(p25_632) p25633 = r(p25_633)
p25641 = r(p25_641) p25642 = r(p25_642)
p25651 = r(p25_651) p25652 = r(p25_652)

/*********************************************/
/* 25th pct (fullsample) */ 
p257 = (r(p25c_4)-r(p25c_1))

/* 25th pct (subgroup) */ 
p25721 = (r(p25c_421)-r(p25c_121)) p25722 = (r(p25c_422)-r(p25c_122)) p25723 = (r(p25c_423)-r(p25c_123))
p25731 = (r(p25c_431)-r(p25c_131)) p25732 = (r(p25c_432)-r(p25c_132)) p25733 = (r(p25c_433)-r(p25c_133))
p25741 = (r(p25c_441)-r(p25c_141)) p25742 = (r(p25c_442)-r(p25c_142)) 
p25751 = (r(p25c_451)-r(p25c_151)) p25752 = (r(p25c_452)-r(p25c_152)) 
,
reps(200) seed(1234): myboot;
#delimit cr


collapse (sd) p25* 
gen id = _n
reshape long p25, i(id) j(month)
gen mo = real(substr(string(month), 1, 1))
gen gr = real(substr(string(month), 2, 2))
keep p25 gr mo
reshape wide p25, i(gr) j(mo)
replace gr = 1 if gr==.
sort gr
keep gr p25* 
order gr p25* 

export excel using "`projectfolder'App.T7_BSE.xls", firstrow(variables) replace 

